if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, broom, skimr, devtools, ggpubr, mgcv, extrafont, mgcViz, here)
theme_set(theme_pubclean(base_size = 14))
# import
lesion <- read_csv(here("cache", "summarised_lesion_data.csv"))
weather <- read_csv(here("cache", "weather_summary.csv"))
dat <- left_join(lesion, weather, by = c("site" = "Location", "rep" = "Rep"))
For reproducibility purposes, use set.seed().
set.seed(27)
mod1 <-
gam(
mean_pot_count ~ s(distance, k = 5),
data = dat
)
summary(mod1)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## mean_pot_count ~ s(distance, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0919 0.0592 18.4 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.84 3.98 45.2 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.435 Deviance explained = 44.4%
## GCV = 0.82474 Scale est. = 0.80738 n = 230
mod2 <-
gam(
mean_pot_count ~ sum_rain+ s(distance, k = 5),
data = dat
)
summary(mod2)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## mean_pot_count ~ sum_rain + s(distance, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1685 0.0746 15.67 <0.0000000000000002 ***
## sum_rain -0.0731 0.0435 -1.68 0.094 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.83 3.98 45.5 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.439 Deviance explained = 45.1%
## GCV = 0.82197 Scale est. = 0.80113 n = 230
mod3 <-
gam(mean_pot_count ~ mws + s(distance, k = 5),
data = dat)
summary(mod3)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## mean_pot_count ~ mws + s(distance, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5928 0.1510 3.93 0.00012 ***
## mws 0.1383 0.0387 3.58 0.00043 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.85 3.98 48.5 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.463 Deviance explained = 47.4%
## GCV = 0.78717 Scale est. = 0.76714 n = 230
mod4 <-
gam(mean_pot_count ~ sum_rain + mws + s(distance, k = 5),
data = dat)
summary(mod4)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## mean_pot_count ~ sum_rain + mws + s(distance, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6641 0.1547 4.29 0.000026 ***
## sum_rain -0.0811 0.0424 -1.91 0.05694 .
## mws 0.1421 0.0385 3.69 0.00028 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.84 3.98 49 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.469 Deviance explained = 48.3%
## GCV = 0.78164 Scale est. = 0.75838 n = 230
mod5 <-
gam(
mean_pot_count ~ sum_rain + s(distance + mws, k = 5),
data = dat
)
## Warning in term[i] <- attr(terms(reformulate(term[i])), "term.labels"): number
## of items to replace is not a multiple of replacement length
summary(mod5)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## mean_pot_count ~ sum_rain + s(distance + mws, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1685 0.0746 15.67 <0.0000000000000002 ***
## sum_rain -0.0731 0.0435 -1.68 0.094 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.83 3.98 45.5 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.439 Deviance explained = 45.1%
## GCV = 0.82197 Scale est. = 0.80113 n = 230
mod6 <-
gam(
mean_pot_count ~ sum_rain + s(distance, k = 5) + s(mws, k = 5),
data = dat
)
summary(mod6)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## mean_pot_count ~ sum_rain + s(distance, k = 5) + s(mws, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0351 0.0788 13.13 <0.0000000000000002 ***
## sum_rain 0.0542 0.0553 0.98 0.33
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.88 3.99 54.5 < 0.0000000000000002 ***
## s(mws) 3.95 4.00 12.8 0.0000000014 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.54 Deviance explained = 55.8%
## GCV = 0.68596 Scale est. = 0.65664 n = 230
mod7 <-
gam(
mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5),
data = dat
)
summary(mod7)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0919 0.0534 20.4 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.88 3.99 54.7 < 0.0000000000000002 ***
## s(mws) 3.95 4.00 13.4 0.00000000051 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.54 Deviance explained = 55.6%
## GCV = 0.68285 Scale est. = 0.65663 n = 230
mod8 <-
gam(
mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain, k = 5),
data = dat
)
summary(mod8)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain,
## k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0919 0.0534 20.4 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.88 3.99 54.78 <0.0000000000000002 ***
## s(mws) 2.56 2.74 7.92 0.0038 **
## s(sum_rain) 2.14 2.21 5.63 0.0018 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.54 Deviance explained = 55.8%
## GCV = 0.68522 Scale est. = 0.65666 n = 230
mod9 <-
gam(
mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5),
data = dat
)
summary(mod9)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0919 0.0546 20 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.87 3.99 53.3 < 0.0000000000000002 ***
## s(sum_rain) 3.87 3.99 11.3 0.000000097 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.52 Deviance explained = 53.6%
## GCV = 0.71262 Scale est. = 0.68552 n = 230
mod10 <-
gam(
mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws,
data = dat
)
summary(mod10)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4298 0.1559 2.76 0.0063 **
## mws 0.1834 0.0405 4.53 0.0000097 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.88 3.99 54.5 < 0.0000000000000002 ***
## s(sum_rain) 2.08 2.27 13.2 0.0000012 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.529 Deviance explained = 54.3%
## GCV = 0.69678 Scale est. = 0.67267 n = 230
This is the same as mod8 but using family = tw(), see ?family.mgcv for more on the families. The Tweedie distribution is used where the distribution has a positive mass at zero, but is continuous unlike the Poisson distribution that requires count data. The data visualisation shows clearly that the mean pot count data have this shape.
mod11.0 <-
gam(
mean_pot_count ~ s(distance, k = 5) +
s(mws, k = 5) +
s(sum_rain, k = 5),
data = dat,
family = tw()
)
summary(mod11.0)
##
## Family: Tweedie(p=1.047)
## Link function: log
##
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain,
## k = 5)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2053 0.0488 -4.2 0.000038 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.17 3.63 79.02 < 0.0000000000000002 ***
## s(mws) 2.09 2.32 7.36 0.00025 ***
## s(sum_rain) 2.29 2.45 10.52 0.000011 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.651 Deviance explained = 61.5%
## -REML = 210.63 Scale est. = 0.36523 n = 230
Try fitting the same model using a different shrinkage smoother, ts is thin- plate splines.
mod11.1 <-
gam(
mean_pot_count ~ s(distance, k = 5, bs = "ts") +
s(mws, k = 5, bs = "ts") +
s(sum_rain, k = 5, bs = "ts"),
data = dat,
family = tw()
)
summary(mod11.1)
##
## Family: Tweedie(p=1.048)
## Link function: log
##
## Formula:
## mean_pot_count ~ s(distance, k = 5, bs = "ts") + s(mws, k = 5,
## bs = "ts") + s(sum_rain, k = 5, bs = "ts")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1998 0.0488 -4.1 0.000059 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 2.74 4 71.05 < 0.0000000000000002 ***
## s(mws) 1.06 4 7.31 0.00000000035 ***
## s(sum_rain) 2.16 4 11.84 0.00000000004 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.638 Deviance explained = 60.3%
## -REML = 218.69 Scale est. = 0.36646 n = 230
This model, same structure as mod11.0, uses thin-plate splines to shrink the coefficients of the smooth to zero.
Looking at mod11.1, mws is not significant as a smoothed term, and we’d suspect it was linear anyway, so add it as a linear term only.
mod12 <-
gam(
mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws,
data = dat,
family = tw()
)
summary(mod12)
##
## Family: Tweedie(p=1.048)
## Link function: log
##
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.7748 0.1293 -5.99 0.0000000083 ***
## mws 0.1586 0.0314 5.05 0.0000009032 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(distance) 3.15 3.62 79.9 < 0.0000000000000002 ***
## s(sum_rain) 2.29 2.54 20.6 0.000000061 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.643 Deviance explained = 60.4%
## -REML = 212.62 Scale est. = 0.36632 n = 230
models <- list(mod1 = mod1,
mod2 = mod2,
mod3 = mod3,
mod4 = mod4,
mod5 = mod5,
mod6 = mod6,
mod7 = mod7,
mod8 = mod8,
mod9 = mod9,
mod10 = mod10,
mod11.0 = mod11.0,
mod11.1 = mod11.1,
mod12 = mod12
)
map_df(models, glance, .id = "model") %>%
arrange(AIC)
## # A tibble: 13 x 7
## model df logLik AIC BIC deviance df.residual
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 mod11.0 8.54 -200. 422. 462. 91.4 221.
## 2 mod12 7.45 -208. 436. 471. 94.0 223.
## 3 mod11.1 6.96 -209. 438. 471. 94.4 223.
## 4 mod7 8.83 -273. 567. 600. 145. 221.
## 5 mod8 9.59 -273. 567. 604. 145. 220.
## 6 mod6 9.83 -273. 568. 605. 145. 220.
## 7 mod10 7.96 -277. 571. 602. 149. 222.
## 8 mod9 8.74 -278. 576. 610. 152. 221.
## 9 mod4 6.84 -291. 598. 625. 169. 223.
## 10 mod3 5.85 -293. 600. 623. 172. 224.
## 11 mod2 5.83 -298. 609. 633. 180. 224.
## 12 mod5 5.83 -298. 609. 633. 180. 224.
## 13 mod1 4.84 -299. 610. 630. 182. 225.
enframe(c(
mod1 = summary(mod1)$r.sq,
mod2 = summary(mod2)$r.sq,
mod3 = summary(mod3)$r.sq,
mod4 = summary(mod4)$r.sq,
mod5 = summary(mod5)$r.sq,
mod6 = summary(mod6)$r.sq,
mod7 = summary(mod7)$r.sq,
mod8 = summary(mod8)$r.sq,
mod9 = summary(mod9)$r.sq,
mod10 = summary(mod10)$r.sq,
mod11.0 = summary(mod11.0)$r.sq,
mod11.1 = summary(mod11.1)$r.sq,
mod12 = summary(mod12)$r.sq
)) %>%
arrange(desc(value))
## # A tibble: 13 x 2
## name value
## <chr> <dbl>
## 1 mod11.0 0.651
## 2 mod12 0.643
## 3 mod11.1 0.638
## 4 mod7 0.540
## 5 mod6 0.540
## 6 mod8 0.540
## 7 mod10 0.529
## 8 mod9 0.520
## 9 mod4 0.469
## 10 mod3 0.463
## 11 mod2 0.439
## 12 mod5 0.439
## 13 mod1 0.435
cat("\nmod1\n")
##
## mod1
coef(mod1)
## (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4
## 1.0919 -10.6886 5.6207 -0.9475 -1.9453
cat("\nmod2\n")
##
## mod2
coef(mod2)
## (Intercept) sum_rain s(distance).1 s(distance).2 s(distance).3
## 1.16847 -0.07309 -10.28296 5.32884 -0.92761
## s(distance).4
## -1.92327
cat("\nmod3\n")
##
## mod3
coef(mod3)
## (Intercept) mws s(distance).1 s(distance).2 s(distance).3
## 0.5928 0.1383 -10.8580 5.7526 -0.9273
## s(distance).4
## -1.9768
cat("\nmod4\n")
##
## mod4
coef(mod4)
## (Intercept) sum_rain mws s(distance).1 s(distance).2
## 0.66411 -0.08111 0.14208 -10.41768 5.43580
## s(distance).3 s(distance).4
## -0.90480 -1.95363
cat("\nmod5\n")
##
## mod5
coef(mod5)
## (Intercept) sum_rain s(distance).1 s(distance).2 s(distance).3
## 1.16847 -0.07309 -10.28296 5.32884 -0.92761
## s(distance).4
## -1.92327
cat("\nmod6\n")
##
## mod6
coef(mod6)
## (Intercept) sum_rain s(distance).1 s(distance).2 s(distance).3
## 1.03513 0.05417 -11.53555 6.18414 -0.91012
## s(distance).4 s(mws).1 s(mws).2 s(mws).3 s(mws).4
## -2.04129 3.58007 8.88574 5.67541 1.30559
cat("\nmod7\n")
##
## mod7
coef(mod7)
## (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4
## 1.0919 -11.3110 6.0268 -0.8972 -2.0312
## s(mws).1 s(mws).2 s(mws).3 s(mws).4
## 3.4550 8.9379 5.8148 1.2085
cat("\nmod8\n")
##
## mod8
coef(mod8)
## (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4
## 1.0919 -11.5274 6.1781 -0.9076 -2.0443
## s(mws).1 s(mws).2 s(mws).3 s(mws).4 s(sum_rain).1
## 0.7218 0.3406 0.7937 0.8430 -0.5327
## s(sum_rain).2 s(sum_rain).3 s(sum_rain).4
## -0.3428 -1.5569 -0.5915
cat("\nmod9\n")
##
## mod9
coef(mod9)
## (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4
## 1.0919 -11.3267 6.0356 -0.9145 -2.0315
## s(sum_rain).1 s(sum_rain).2 s(sum_rain).3 s(sum_rain).4
## 48.5288 -28.2681 16.9986 1.4408
cat("\nmod10\n")
##
## mod10
coef(mod10)
## (Intercept) mws s(distance).1 s(distance).2 s(distance).3
## 0.4298 0.1834 -11.3882 6.0817 -0.9120
## s(distance).4 s(sum_rain).1 s(sum_rain).2 s(sum_rain).3 s(sum_rain).4
## -2.0375 -0.4626 0.1845 -1.2396 -0.3232
cat("\nmod11.0\n")
##
## mod11.0
coef(mod11.0)
## (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4
## -0.2053 -2.5038 1.0412 -0.2882 -1.0477
## s(mws).1 s(mws).2 s(mws).3 s(mws).4 s(sum_rain).1
## 0.2746 0.1912 0.5692 0.4779 -0.7130
## s(sum_rain).2 s(sum_rain).3 s(sum_rain).4
## -0.2235 -1.5702 -0.5669
cat("\nmod11.1\n")
##
## mod11.1
coef(mod11.1)
## (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4
## -0.19984 -1.46252 0.36842 -0.28290 -0.94715
## s(mws).1 s(mws).2 s(mws).3 s(mws).4 s(sum_rain).1
## 0.01161 0.01202 0.03989 0.23732 -0.52269
## s(sum_rain).2 s(sum_rain).3 s(sum_rain).4
## 0.32671 -1.38663 -0.30584
cat("\nmod12\n")
##
## mod12
coef(mod12)
## (Intercept) mws s(distance).1 s(distance).2 s(distance).3
## -0.7748 0.1586 -2.4549 1.0081 -0.2937
## s(distance).4 s(sum_rain).1 s(sum_rain).2 s(sum_rain).3 s(sum_rain).4
## -1.0442 -1.2567 0.6562 -1.6350 -0.3442
anova(mod1,
mod2,
mod3,
mod4,
mod5,
mod6,
mod7,
mod8,
mod9,
mod10,
mod11.0,
mod11.1,
mod12,
test = "F")
## Analysis of Deviance Table
##
## Model 1: mean_pot_count ~ s(distance, k = 5)
## Model 2: mean_pot_count ~ sum_rain + s(distance, k = 5)
## Model 3: mean_pot_count ~ mws + s(distance, k = 5)
## Model 4: mean_pot_count ~ sum_rain + mws + s(distance, k = 5)
## Model 5: mean_pot_count ~ sum_rain + s(distance + mws, k = 5)
## Model 6: mean_pot_count ~ sum_rain + s(distance, k = 5) + s(mws, k = 5)
## Model 7: mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5)
## Model 8: mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain,
## k = 5)
## Model 9: mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5)
## Model 10: mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws
## Model 11: mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain,
## k = 5)
## Model 12: mean_pot_count ~ s(distance, k = 5, bs = "ts") + s(mws, k = 5,
## bs = "ts") + s(sum_rain, k = 5, bs = "ts")
## Model 13: mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws
## Resid. Df Resid. Dev Df Deviance F Pr(>F)
## 1 225 182
## 2 224 180 0.99786 2.2 6.04 0.0148 *
## 3 224 172 0.00449 7.6 4651.00 0.000000010075 ***
## 4 223 169 0.99805 2.7 7.45 0.0069 **
## 5 224 180 -1.00254 -10.3 28.26 0.000000252645 ***
## 6 220 145 4.00874 35.0 23.92 < 0.0000000000000002 ***
## 7 221 145 -1.00051 -0.7 1.79 0.1818
## 8 220 145 0.94424 0.5 1.42 0.2329
## 9 221 152 -0.95630 -6.9 19.87 0.000018302234 ***
## 10 222 149 -0.71152 2.3
## 11 220 400 1.99362 -250.2
## 12 221 418 -1.70062 -18.5 29.73 0.000000000075 ***
## 13 221 416 0.31889 2.2 19.15 0.0028 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# generic function to print GAMs using mgcViz
p_gam <- function(x) {
plot(x, allTerms = T) +
l_points() +
l_fitLine(linetype = 1) +
l_ciLine(linetype = 3) +
l_ciBar() +
l_rug()
}
print(p_gam(x = getViz(mod1)) +
ggtitle("s(Distance)"),
pages = 1)
print(p_gam(x = getViz(mod2)) +
ggtitle("s(Distance) + Precipitation"),
pages = 1)
print(p_gam(x = getViz(mod3)) +
ggtitle("s(Distance) + Windspeed"),
pages = 1)
print(p_gam(x = getViz(mod4)) +
ggtitle("s(Distance) + Windspeed + Precipitation"),
pages = 1)
print(p_gam(x = getViz(mod5)) +
ggtitle("s(Distance + Windspeed) + Precipitation"),
pages = 1)
print(p_gam(x = getViz(mod6)) +
ggtitle("s(Distance) + s(Windspeed) + Precipitation"),
pages = 1)
print(p_gam(x = getViz(mod7)) +
ggtitle("s(Distance) + s(Windspeed)"),
pages = 1)
print(p_gam(x = getViz(mod8)) +
ggtitle("s(Distance) + s(Windspeed) + s(Precipitation)"),
pages = 1)
print(p_gam(x = getViz(mod9)) +
ggtitle("s(Distance) + s(Precipitation)"),
pages = 1)
print(p_gam(x = getViz(mod10)) +
ggtitle("s(Distance) + s(Precipitation) + Windspeed"),
pages = 1)
print(p_gam(x = getViz(mod11.0)) +
ggtitle("s(Distance) + s(Windspeed) + s(Precipitation), family = tw()"),
pages = 1)
print(
p_gam(x = getViz(mod11.1)) +
ggtitle(
"s(Distance, bs = 'ts') + s(Windspeed, bs = 'ts')\n+ s(Precipitation, bs = 'ts'), family = tw()"
),
pages = 1
)
print(
p_gam(x = getViz(mod12)) +
ggtitle("s(Distance) + s(Precipitation) + Windspeed, family = tw()"),
pages = 1
)
gam.check(mod11.0)
##
## Method: REML Optimizer: outer newton
## full convergence after 9 iterations.
## Gradient range [-0.00003951,0.00000204]
## (score 210.6 & scale 0.3652).
## Hessian positive definite, eigenvalue range [0.3606,1703].
## Model rank = 13 / 13
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(distance) 4.00 3.17 0.74 <0.0000000000000002 ***
## s(mws) 4.00 2.09 0.78 <0.0000000000000002 ***
## s(sum_rain) 4.00 2.29 0.76 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
This model, mod11.0, mean_pot_count ~ s(Distance) + s(Windspeed) + s(Precipitation) - family = tw(), lacks enough numbers of basis functions for two of three predictors, distance, wsp, but it’s close. Realistically we need to increase the k values for these predictors, but we’re lacking the data to do this.